# Running R commands
print("hello world")
source("hello_world.r")

# Workspace
getwd()
setwd("D:/courses/Bioinformatics")
save.image()
load("D:/courses/Bioinformatics/.RData")
ls()
rm(list=ls())

# Arithmetic operations and numerical functions
2+2
exp(-2)

# Simulating
set.seed(2006)
rnorm(5, 1.70, 0.10)
runif(5, 0.05, 0.5)

# Assignment
x <- 1
1 -> x
print(x)
x

# Classes
class(x)
x <- "Gene1"
class(x)

# Vectors
y <- c(1,0)
y
z <- c("Female", "Male", "Female", "Female", "Male")
z
z=="Female"

# Vectorized arithmetic
weight <- c(60, 72, 57, 90, 95, 72)
height <- c(1.75, 1.80, 1.65, 1.90, 1.74, 1.91)
bmi <- weight/height^2
bmi

# Graphics
plot(height, weight)
plot(height, weight, pch=2)
hh <- c(1.65, 1.70, 1.75, 1.80, 1.85, 1.90)
lines(hh, 22.5*hh^2)

# Functions
plot(height, weight, pch=2)
ls()
ls

# Vector functions
min(bmi)
mean(bmi)

# Concatenating vectors
c(y, 0)
c(y, z)
c(y, TRUE)
c("Female",FALSE)

# Creating sequences
seq(4,9)
seq(4,10,2)
hh <- seq(1.65, 1.90, 0.05)
4:9

# Replicating vectors
oops <- 0:2
rep(oops, 3)
rep(oops, 1:3)
rep(oops, each=3)

# Simulating a die
set.seed(2006)
sample(1:6,6)
sample(1:6,6,replace=TRUE)
sample(1:6,6,replace=TRUE,prob=c(rep(0.1,5), 0.5))

# Factors
x1 <- factor(c("AA", "AT", "TT", "TT", "AT", "AT", "AA", "TT"))
x1
levels(x1)

# Matrices
X <- matrix(c("AA", "AT", "GG", "GG"), nrow=2)
X
x1 <- c("AA", "AT")
x2 <- c("GG", "GG")
X <- cbind(x1, x2)
X

# Matrix functions
t(X)
matrix(c("AA", "AT", "GG", "GG"), nrow=2, byrow=TRUE)
dim(X)
colnames(X)
rownames(X)

# Attributes
attributes(X)
attributes(X)$dim

# Numerical matrix functions
A <- matrix(1:4, nrow=2)
A
solve(A)

# Lists
list(trait=y, genotypes=X)

# Dataframes
ExampleData <- data.frame(ID = c(1, 2, 3, 4, 5),
                          SNP = c("AA", "AT", "TT", "TT", "AA"),
                          Gender = c("Female", "Male", "Female", "Female", "Male"), 
                          DiseaseStatus = c(1, 1, 0, 0, 0))
ExampleData

# Importing data
ExampleData <- read.table("ExampleData.txt", header=TRUE, sep="\t")
ExampleData

# Attaching data
ExampleData$ID
ExampleData$SNP
attach(ExampleData)
ID
SNP

# Exploring data
class(ExampleData)
names(ExampleData)
dim(ExampleData)

# Tabulating
table(ExampleData$Gender)
table(ExampleData$Gender)/5
table(ExampleData$Gender)/dim(ExampleData)[1]
table(ExampleData$Gender)/nrow(ExampleData)
table(ExampleData$Gender)/sum(table(ExampleData$Gender))

# Converting
class(ExampleData$Gender)
GenderNum <- as.numeric(ExampleData$Gender)
GenderNum
GenderNum-1
within(ExampleData,{
  GenderNum <- as.numeric(ExampleData$Gender)
  rm(Gender)
})

# Subsetting
ExampleData[1,]
ExampleData[,3]
list(trait=y, genotypes=X)[[1]]
ExampleData[c(2,4),1:3]
ExampleData[-c(2,4,5),1:3]
ExampleData[ExampleData$Gender=="Male",]
ExampleData[ExampleData$Gender=="Male" & ExampleData$DiseaseStatus>0,]
subset(ExampleData, Gender=="Female")
subset(ExampleData, Gender=="Female", select=c("ID", "SNP"))
boo <- c(0,1,NA)
is.na(boo)
boo==NA

# Stratified tabulating
table(ExampleData$SNP[ExampleData$DiseaseStatus==1])
table(ExampleData$SNP[ExampleData$DiseaseStatus==0])
table(ExampleData$SNP, ExampleData$DiseaseStatus)

# Implicit loops
tapply(ExampleData$SNP, ExampleData$DiseaseStatus, table)
sapply(ExampleData, mean)
lapply(ExampleData, mean)
m <- matrix(1:6, nrow=2, byrow=TRUE)
m
apply(m, 2, sum)
set.seed(2006)
replicate(5, mean(rnorm(100, 1.70, 0.10)))
replicate(5, min(runif(100, 0.05, 0.5)))

# Sorting
sort(ExampleData$Gender)
order(ExampleData$Gender)
ExampleData$SNP[order(ExampleData$Gender)]

# Plot layout
set.seed(2006)
x <- runif(50, 0, 2)
y <- runif(50, 0, 2)
plot(x, y, main="Main title", sub="subtitle", xlab="x-label", ylab="y-label")
text(0.6, 0.6, "text at (0.6,0.6)")
abline(h=0.6, v=0.6)

# Building a plot from pieces
plot(x, y, type="n", xlab="", ylab="", axes=FALSE)
points(x, y)
axis(1)
axis(2, at=seq(0.2, 1.8, 0.2))
box()
title(main="Main title", sub="subtitle", xlab="x-label", ylab="y-label")

# Combining plots
set.seed(2006)
x <- rnorm(100, 1.70, 0.10)
hist(x, freq=FALSE)
curve(dnorm(x, 1.70, 0.10), add=TRUE)
histo <- hist(x, plot=FALSE)
ylimit <- range(0, histo$density, dnorm(1.70))
hist(x, freq=FALSE, ylim=ylimit)
curve(dnorm(x, 1.70, 0.10), add=TRUE)

# Writing functions
hist.with.normal <- function(x, xlabel=deparse(substitute(x)), ...)
{
  histo <- hist(x, plot=FALSE, ...)
  sdev <- sd(x)
  m <- mean(x)
  ylimit <- range(0, histo$density, dnorm(0, sdev))
  hist(x, freq=FALSE, ylim=ylimit, xlab=xlabel)
  curve(dnorm(x, m, sdev), add=TRUE)
}
hist.with.normal(rnorm(200, 1.70, 0.10))
sqrt.mat = function(Mat){
e = eigen(Mat, symmetric=TRUE)
sqrt.Mat = e$vectors %*% diag(sqrt(e$values)) %*% t(e$vectors)
return(sqrt.Mat)
}
A <- matrix(c(1, 0.5, 0.5, 1/3), nrow=2)
sqrt.mat(A)

# Flow control
y <- 12345
x <- y/2
while(abs(x^2-y) > 1e-10) x <- (x+y/x)/2
x
x^2
x <- y/2
repeat{
  x <- (x+y/x)/2
  if (abs(x^2-y) < 1e-10) break
}
x
y = 1:5
x^2
x <- y/2
repeat{
  x <- (x+y/x)/2
  if (all(abs(x^2-y) < 1e-10)) break
}
x
x^2
x <- seq(0, 1, 0.05)
plot(x, x, ylab="y", type="l")
for (j in 2:8) lines(x, x^j)
x <- seq(1, 100, 3)
y <- rep(0, length(x))
for (i in 1:length(x)) {
  y[i] <- x[i]^2
}

# Statistical distributions
qchisq(0.05, df=2, lower.tail=FALSE) 

# Installing packages
install.packages("genetics")
library(genetics)
source("http://bioconductor.org/biocLite.R")
biocLite()

# Getting help
help(read.table)
help.search("read")
